options(scipen = 999)
library(readr)
library(readxl)
library(stringr)
library(dplyr)
library(writexl)

d <- read.csv("~/Bases de datos/EMPAQUETAMIENTO_MOVIL.csv", sep = ";", dec = ",")

d$mes <- (d$TRIMESTRE - 1) * 3 + d$MES_DEL_TRIMESTRE
d$t   <- paste0(d$ANNO, "-", sprintf("%02d", d$mes))

d <- d %>% 
  filter(MODALIDAD_PAGO != "Prepago sin compra" & CANTIDAD_LINEAS > 0 & VALOR_FACTURADO_O_COBRADO > 0) %>%
  mutate(
    EMPRESA = recode(EMPRESA,
      "COMUNICACION CELULAR S A COMCEL S A"              = "COMCEL",
      "COLOMBIA TELECOMUNICACIONES S.A. E.S.P."          = "MOVISTAR",
      "COLOMBIA MOVIL S.A. E.S.P."                       = "TIGO",
      "COLOMBIA MOVIL  S.A ESP"                          = "TIGO",
      "PARTNERS TELECOM COLOMBIA SAS"                    = "PTC",
      "VIRGIN MOBILE COLOMBIA S.A.S."                    = "VIRGIN",
      "EMPRESA DE TELECOMUNICACIONES DE BOGOTA S.A. ESP" = "ETB",
      "ALMACENES EXITO INVERSIONES S.A.S."               = "EXITO",
      "SETROC MOBILE GROUP SAS"                          = "SETROC",
      "AVANTEL S.A.S."                                   = "AVANTEL",
      "AVANTEL S.A.S"                                    = "AVANTEL",
      "LOGISTICA FLASH COLOMBIA S.A.S"                   = "FLASH",
      "LOGISTICA EN TELECOMUNICACIONES S.A.S."           = "FLASH",
      "SUMA MOVIL S.A.S."                                = "SUMA",
      "LOV TELECOMUNICACIONES SAS"                       = "LOV",
      "LIWA S.A.S. E.S.P."                               = "LIWA",
      "PLINTRON COLOMBIA S.A.S"                          = "PLINTRON",
      "PLINTRON COLOMBIA SAS"                            = "PLINTRON",
      "ITEC SOLUTIONS SAS"                               = "ITEC",
      "INSIDENET SAS"                                    = "INSIDENET",
      "CELLVOZ COLOMBIA SERVICIOS INTEGRALES S A E S P"  = "CELLVOZ",
      "CONREDES IM SAS"					 = "CONREDES"
    ),
    serv = ID_SERVICIO_PAQUETE,
    posp = -(ID_MODALIDAD_PAGO - 2) # Variable dummy para Pospago
  )

d <- d %>%
  group_by(ANNO, t, EMPRESA, serv, posp) %>%
  summarise(
    q  = sum(CANTIDAD_LINEAS, na.rm = TRUE), 
    vp = sum(VALOR_FACTURADO_O_COBRADO, na.rm = TRUE),
    p  = vp / (q * 1000),
    .groups = 'drop'
  ) %>%
  group_by(t, serv) %>%
  mutate(
    M = sum(q, na.rm = TRUE),
    s = q / M
  ) %>%
  ungroup()

empresas_relevantes <- d %>% 
  filter(serv == 3) %>%
  group_by(t, EMPRESA) %>%
  summarise(v = sum(s) * 100, .groups = 'drop') %>%
  group_by(EMPRESA) %>%
  summarise(m = mean(v), .groups = 'drop') %>%
  filter(m >= 1) %>%
  pull(EMPRESA) 

d <- d %>% filter(EMPRESA %in% empresas_relevantes)

d <- d %>%
  mutate(
    j = as.numeric(as.factor(paste(EMPRESA, serv, posp, sep = '-'))),
    k = as.numeric(as.factor(paste(EMPRESA, serv, sep = '-')))
  ) %>%
  # z1
  group_by(ANNO, j) %>%
  mutate(z1 = (sum(p) - p) / (n() - 1)) %>%
  # z2
  group_by(ANNO, k) %>%
  mutate(z2 = (sum(p) - p) / (n() - 1)) %>%  
  # z3
  group_by(ANNO, EMPRESA) %>%
  mutate(z3 = (sum(p) - p) / (n() - 1)) %>%
  # z4
  group_by(j) %>%
  mutate(z4 = (sum(p) - p) / (n() - 1)) %>%
  # z5
  group_by(t, EMPRESA) %>%
  mutate(z5 = (sum(p) - p) / (n() - 1)) %>%
  ungroup()

d <- filter(d, serv == 3)

DATA <- d
save(DATA, file = "b. MDATA4.RData")